"""
@author: wangkang
@contact: wangkang@autobio.com.cn
@file: parse_json_entity.py
@time: 2020/4/1 14:46
@desc: 
"""
import os
import re
import json
from functools import reduce
base_path = os.path.dirname(os.path.dirname(__file__))

entity_types = ['设备类型', '部位', '具体方法', '子部位', '处理方向', '具体表现']
for type in entity_types:
    with open(os.path.join(base_path,'data/file.json'), 'r', encoding='utf8')as file:
        with open(os.path.join(base_path,'data/%s.txt' % type), 'a+', encoding='utf8')as f:
            entity_list = []
            for idx, line in enumerate(file.readlines()):
                # 目前，仅对前70个样本进行打标处理
                if idx < 70:
                    dic = json.loads(line)
                    dic['labels'].sort(key=lambda x: x[1])
                    for i in range(len(dic['labels'])):
                        entity_start, entity_end, entity_type = dic['labels'][i]
                        entity_name = dic['text'][entity_start: entity_end]
                        if type == entity_type:
                            entity_list.append(entity_name)
            for entity_name in set(entity_list):
                f.write(entity_name + '\n')
            print('完成实体类型为%s的实体收集' % type)
